import requests
from bs4 import BeautifulSoup
import re

url="http://lib.scu.edu.cn/"
pageUrl=["http://lib.scu.edu.cn/news_bulletin?page=0","http://lib.scu.edu.cn/news_bulletin?page=1", "http://lib.scu.edu.cn/news_bulletin?page=2"]
subUrl=[]
information=[]
passage=[]
imgUrl=[]
picNum=0
title=[]
pagePicNum=[]

for i in range(len(pageUrl)):
    response=requests.get(pageUrl[i])
    response.encoding = response.apparent_encoding
    soup=BeautifulSoup(response.text, "html.parser")
    get1 = soup.find_all('a', hreflang='zh-hans')
    for j in range(len(get1)):
        get2 = get1[j]
        p = re.findall(r'/node.*href', str(get2))
        subUrl.append(url + p[0][:-6])
        title.append(get2.get_text())
for y in range(len(subUrl)):
    response=requests.get((subUrl[y]))
    response.encoding=response.apparent_encoding
    soup=BeautifulSoup(response.text, "html.parser")
    getinfo1=soup.find('div', class_='field field--name-field-date-published field--type-datetime field--label-inline')
    information.append(getinfo1.get_text())
    getpassage=soup.find('div', property='schema:text')
    passage.append(getpassage.get_text())
    imgArry=getpassage.find_all('img', border='0')
    for k in range(len(imgArry)):
        imgUrl.append(url+''.join(re.findall(r"src=(.+?)style",str(imgArry[k])))[1:])
    pagePicNum.append(len(imgArry))
# print(len(information))
# print(len(title))
# print(len(passage))
# print(pagePicNum)
# print(len(imgUrl))